Exporatory Data Analysis of NEFSC Bottom Trawl Survey Data

Exploration of spatial and temporal patterns in abundance, and bodymass of fishes from the Northeast groundfish survey.

# Do some formatting
trawldat <- weights_20 %>% 
  mutate(
    id = as.character(id),
    season = ifelse(season == "SPRING", "Spring", "Fall"),
    season = factor(season, levels = c("Spring", "Fall"))
  )

# Run Summary Functions
ann_means <- ss_annual_summary(trawldat)
seasonals <- ss_seasonal_summary(trawldat)

# bind them so you can facet
summs <- bind_rows(ann_means, seasonals) %>% 
  mutate(season = factor(season, levels = c("Spring", "Fall", "Spring + Fall")))


#drop haddock to see if that changes the bump
haddock_ann  <- ss_annual_summary(filter(trawldat, comname != "haddock"))
haddock_seas <- ss_seasonal_summary(filter(trawldat, comname != "haddock"))
no_haddock <- bind_rows(haddock_ann, haddock_seas) %>% 
  mutate(season = factor(season, levels = c("Spring", "Fall", "Spring + Fall")))

Spatial Patterns

For large regions like Georges Bank and the Gulf of Maine, what kind of patterns are we seeing.

# Load the strata
survey_strata <- read_sf(str_c(res_path, "Shapefiles/BottomTrawlStrata/BTS_Strata.shp"))  %>% 
  clean_names() %>% 
  filter(strata >= 01010 ,
         strata <= 01760,
         strata != 1310,
         strata != 1320,
         strata != 1330,
         strata != 1350,
         strata != 1410,
         strata != 1420,
         strata != 1490) 


# Key to which strata = which regions
strata_key <- list(
  "Georges Bank"          = as.character(13:23),
  "Gulf of Maine"         = as.character(24:40),
  "Southern New England"  = str_pad(as.character(1:12), width = 2, pad = "0", side = "left"),
  "Mid-Atlantic Bight"    = as.character(61:76))


# Assign Areas
survey_strata <- survey_strata %>% 
  mutate(
    strata = str_pad(strata, width = 5, pad = "0", side = "left"),
    strata_num = str_sub(strata, 3, 4),
    area = case_when(
      strata_num %in% strata_key$`Georges Bank` ~ "Georges Bank",
      strata_num %in% strata_key$`Gulf of Maine` ~ "Gulf of Maine",
      strata_num %in% strata_key$`Southern New England` ~ "Southern New England",
      strata_num %in% strata_key$`Mid-Atlantic Bight` ~ "Mid-Atlantic Bight",
    TRUE ~ "Outside Major Study Areas"
  )) %>% 
  select(finstr_id, strata, strata_num, area, a2, str2, set, stratuma, str3, geometry)

# Load new england
new_england <- ne_states("united states of america") %>% st_as_sf(crs = 4326) 
canada <- ne_states("canada") %>% st_as_sf(crs = 4326) 


# Make trawl data an sf dataset
trawl_sf <- trawldat %>% st_as_sf(coords = c("decdeg_beglon", "decdeg_beglat"), crs = 4326)

Trawl Regions

# Plot to check
ggplot() +
  geom_sf(data = new_england) +
  geom_sf(data = canada) +
  geom_sf(data = survey_strata, aes(fill = area)) +
  coord_sf(xlim = c(-77, -65.5), ylim = c(34, 45.75), expand = FALSE) +
  guides(fill = guide_legend(nrow = 2)) +
  theme_bw() +
  theme(legend.position = "bottom", legend.title = element_blank())

Regional Summaries

# Just Area, all seasona
area_summs <- trawldat %>% 
  group_by(area) %>% 
  summarise(
    season = "Spring + Fall",
    lw_biomass_kg = sum(sum_weight_kg, na.rm = T),
    n_stations = n_distinct(id),
    lw_biomass_per_station = lw_biomass_kg / n_stations,
    mean_ind_bodymass = weighted.mean(ind_weight_kg, weights = numlen_adj),
    mean_ind_length = weighted.mean(length, weights = numlen_adj)
  )

# Area x Season
seas_area <- trawldat %>% 
  group_by(area, season) %>% 
  summarise(
    lw_biomass_kg = sum(sum_weight_kg, na.rm = T),
    n_stations = n_distinct(id),
    lw_biomass_per_station = lw_biomass_kg / n_stations,
    mean_ind_bodymass = weighted.mean(ind_weight_kg, weights = numlen_adj),
    mean_ind_length = weighted.mean(length, weights = numlen_adj)
  )


# Combine those two
summs_combined <- bind_rows(area_summs, seas_area) %>% 
  mutate(season = factor(season, levels = c("Spring", "Fall", "Spring + Fall")))

summs_combined %>% 
  mutate_if(is.numeric,round, 2) %>% 
  arrange(area,season) %>% 
  knitr::kable()
area season lw_biomass_kg n_stations lw_biomass_per_station mean_ind_bodymass mean_ind_length
GB Spring 4670398 3005 1554.21 0.88 40.14
GB Fall 10430280 3129 3333.42 0.74 37.59
GB Spring + Fall 15100678 6134 2461.80 0.81 38.79
GoM Spring 3046226 3636 837.80 0.65 34.95
GoM Fall 8135976 3747 2171.33 0.73 36.40
GoM Spring + Fall 11182202 7383 1514.59 0.69 35.72
MAB Spring 6591618 2550 2584.95 0.84 43.86
MAB Fall 1299542 2497 520.44 0.64 25.53
MAB Spring + Fall 7891159 5047 1563.53 0.78 38.08
SNE Spring 4125352 2868 1438.41 0.57 36.42
SNE Fall 2130115 2751 774.31 0.48 32.70
SNE Spring + Fall 6255467 5619 1113.27 0.54 34.96
# Year x Area
area_summs_y <- trawldat %>% 
  group_by(est_year, area) %>% 
  summarise(
    season = "Spring + Fall",
    lw_biomass_kg = sum(sum_weight_kg, na.rm = T),
    n_stations = n_distinct(id),
    lw_biomass_per_station = lw_biomass_kg / n_stations,
    mean_ind_bodymass = weighted.mean(ind_weight_kg, weights = numlen_adj),
    mean_ind_length = weighted.mean(length, weights = numlen_adj)
  )

Total Biomass

area_summs_y %>% 
  ggplot(aes(est_year, lw_biomass_kg)) +
    geom_line() +
    facet_wrap(~area, ncol = 2) +
    scale_y_continuous(labels = scales::comma_format()) +
    labs(x = "", y = "Total Biomass (kg)")

CPUE

area_summs_y %>% 
  ggplot(aes(est_year, lw_biomass_per_station)) +
    geom_line() +
    facet_wrap(~area, ncol = 2) +
    labs(x = "", y = "Adjusted Biomass per Station (kg)")

Comparisons to Older Data

Concerns have been raised that the datasets obtained through the NEFSC are inconsistent in some areas over time. The following plots seek to identify differences in a dataset obtained in 2016 from what we currently are exploring with the 2020 dataset.

Each file was processed for size-spectra analysis using the same processing steps. This includes the same species codes, the same abundance and stratification adjustments, and the same L-W derived biomasses.

weights_16 <- read_csv(here::here("data/ss_prepped_data/survdat_2016_ss.csv"),
                       col_types = cols(),
                       guess_max = 1e5)
weights_19 <- read_csv(here::here("data/ss_prepped_data/survdat_2019_ss.csv"),
                       col_types = cols(),
                       guess_max = 1e5)
weights_20 <- read_csv(here::here("data/ss_prepped_data/survdat_2020_ss.csv"),
                       col_types = cols(),
                       guess_max = 1e5)

# run summaries
summ_16 <- ss_regional_differences(weights_16) %>% mutate(source = "2016")
summ_19 <- ss_regional_differences(weights_19) %>% mutate(source = "2019")
summ_20 <- ss_regional_differences(weights_20) %>% mutate(source = "2020")
reg_summs <- bind_rows(list(summ_16, summ_19, summ_20))
# Total Biomass
p1 <- reg_summs %>% 
  ggplot(aes(est_year, lw_biomass_kg, color = source)) +
  geom_line(show.legend = F) +
  scale_y_continuous(labels = scales::comma_format()) +
  facet_wrap(~area, ncol = 1, scales = "free") +
  labs(x = "", y = "Total Biomass \n (L-W Regressions)")

# Total Biomass - FSCS
p2 <- reg_summs %>% 
  ggplot(aes(est_year, fscs_biomass, color = source)) +
  geom_line() +
  scale_y_continuous(labels = scales::comma_format()) +
  facet_wrap(~area, ncol = 1) +
  labs(x = "", y = "Total Biomass \n (FSCS Haul Weights)")

# effort
p3 <- reg_summs %>% 
  ggplot(aes(est_year, n_stations, color = source)) +
  geom_line(show.legend = F) +
  scale_y_continuous(labels = scales::comma_format()) +
  facet_wrap(~area, ncol = 1) +
  labs(x = "", y = "Effort (haul count)")

# Species 
p4 <- reg_summs %>% 
  ggplot(aes(est_year, n_species, color = source)) +
  geom_line(show.legend = F) +
  scale_y_continuous(labels = scales::comma_format()) +
  facet_wrap(~area, ncol = 1) +
  labs(x = "", y = "Distinct Species")

p1 + p2 + p3 + p4

 

A work by Adam A. Kemberling

Akemberling@gmri.org